1 Input parameters

Code
printParams(params)
Table 1.1: Input parameters.

2 Read repertoires

Code
# Read repertoire
db <- readInput(params[['input']], col_select = NULL)

if (params$species =="auto") {
    if ("species" %in% colnames(db)) {
        species <- unique(db[['species']])
        if (length(species)>1) {
            stop("Multiple species detected. Only one allowed.")
        }
    } else  {
        stop("Can't detect species. The column `species` does not exist in `db`.")
    }
} else {
    species <- params$species
}

# Check for single cell label
if (!is.null(singlecell)) {
    if (singlecell %in% colnames(db)) {
         db[[ singlecell ]] <- as.logical(db[[ singlecell ]] )   
    } else {
        stop("`",singlecell, "` is not a valid field in `db`.")
    }
} else {
   singlecell <- "single_cell"
   db[[singlecell]] <- F
   if ("cell_id" %in% colnames(db)) {
       message("Setting `singlecell` using `cell_id`.") 
       db[[singlecell]][!db[['cell_id']] %in% c(NA, '')] <- T
   }

}

if (!"locus" %in% colnames(db)) {
    db[['locus']] <- getLocus(db[['v_call']])
}
heavy_chains <- isHeavyChain(db[['locus']])

if ("clone_id" %in% colnames(db)) {
    if (params$force) {
        # Reset if force
        warning("Overwritting clone_id.")
        db$clone_id <- NULL
    }   
    # Reset always if clone_id exists
    if ("clone_size_count" %in% colnames(db)) {    
        warning("Overwritting clone_size_count.")    
        db$clone_size_count <- NULL
    }
    if ("clone_size_freq" %in% colnames(db)) {    
        warning("Overwritting clone_size_freq.")    
        db$clone_size_freq <- NULL
    }
}
Code
input_size <- nrow(db)
input_sizes <- db %>%
    group_by(!!!rlang::syms(unique(c("input_file")))) %>%
    summarize(input_size=n())

# Track input size of the expected output groups, for the
# end of report summary
input_sizes_byoutput <- db %>%
    group_by(!!!rlang::syms(unique(params$outputby))) %>%
    mutate(
        num_input_files = length(unique(input_file)),
        input_files = paste(unique(input_file), collapse=",")
    ) %>%
    ungroup() %>%
    group_by(!!!rlang::syms(unique(c(params$outputby, "input_file", "num_input_files", "input_files")))) %>%
    summarize(
        input_size = n()
    )

Number of sequences loaded: 7715. Number of heavy chain sequences loaded: 7715.

Code
input_samples_summary <- db %>%
   group_by(sample_id, subject_id, tissue) %>%
   summarize(size = n()) %>%
   arrange(subject_id)

eetable(input_samples_summary)$table
Table 2.1: Input samples summary.

2.1 Sequences per locus

Code
input_locus_summary <- db %>%
   group_by(!!!rlang::syms(unique(c("sample_id",params$cloneby, "locus")))) %>%
   summarize(n=n(), .groups="drop") %>%
   pivot_wider(names_from=locus, values_from=n) %>%
   rowwise() %>%
   mutate(Total = sum(!!!rlang::syms(unique(db[['locus']]))))

total <- data.frame(list("sample_id"="Total", 
                    t(input_locus_summary %>%
   select(!!!rlang::syms(c(unique(db[['locus']]), "Total"))) %>%
   colSums(na.rm = T))))

input_locus_summary <- bind_rows(input_locus_summary, total)

tab_caption <- paste0("Input data. Number of sequences in each ", 
                   paste("sample", params$cloneby, sep=", "),
                   " and locus."
      )
eetable(input_locus_summary)$table
Table 2.2: Input data. Number of sequences in each sample, subject_id and locus.
Code
cat("## Sequences per c_call\n")

2.2 Sequences per c_call

Code
input_c_call_summary <- db %>%
   group_by(!!!rlang::syms(unique(c("sample_id",params$cloneby, "c_call")))) %>%
   summarize(n=n(), .groups="drop") %>%
       pivot_wider(names_from=c_call, values_from=n)

tab_caption <- paste0("Input data. Number of sequences in each ", 
                   paste("sample", params$cloneby, sep=", "),
                   " and c_call"
      )
eetable(input_c_call_summary)$table
Table 2.3: Input data. Number of sequences in each sample, subject_id and c_call
Code
cat("## Sequences per constant region\n")

2.3 Sequences per constant region

Code
# Create c_gene column only for plotting (will be removed before storing the df again)
db <- db %>% filter(locus %in% c("IGH", "IGK", "IGL")) %>%
    mutate(c_gene = alakazam::getGene(c_call, first=TRUE))

input_cgene_summary <- db %>%
   group_by(!!!rlang::syms(unique(c("sample_id",params$cloneby, "c_gene")))) %>%
   summarize(n=n(), .groups="drop") %>%
       pivot_wider(names_from=c_gene, values_from=n)

tab_caption <- paste0("Input data. Number of sequences in each ", 
                   paste("sample", params$cloneby, sep=", "),
                   " and c_gene"
      )
eetable(input_cgene_summary)$table
Table 2.4: Input data. Number of sequences in each sample, subject_id and c_gene

3 Clonal assignment

Clonal assignment performed with scoper::hierarchicalClones, version 1.3.0 within subject_id.

To know more details about the method, visit the documentation website https://scoper.readthedocs.io/en/

Code
if (sum(heavy_chains)>0) {
    if (params$model == "hierarchical") {
        if (all(db[[ singlecell ]] == T )) {
            cell_id <- 'cell_id'
        } else {
            db_light <- db %>% filter(!isHeavyChain(locus))
            cell_id <- NULL
            if (all(c(T,F) %in% db[[ singlecell ]])) {
                warning("Mix of single and bulk data. Setting cell_id=`NULL`.")
            }
        }
        db <- hierarchicalClones(db, 
                                 params$threshold, 
                                 method=params$method,
                                 linkage=params$linkage, 
                                 normalize="len",
                                 junction="junction", 
                                 v_call="v_call", j_call="j_call", 
                                 clone="clone_id", 
                                 fields=params$cloneby,
                                 cell_id=cell_id, 
                                 locus="locus", 
                                 only_heavy=TRUE, 
                                 split_light=TRUE,
                                 first=FALSE, 
                                 cdr3=FALSE, mod3=FALSE, 
                                 max_n=0, nproc=params$nproc,
                                 verbose=FALSE, log=NULL,
                                 summarize_clones=FALSE) 
    } else {
        stop("Unsuported model requested. Supported models: hierarchical")
    }
} else {
    warning("No heavy chain sequences found.")
    db$clone_id <- NA
}

7710 sequences passed the clonal assignment step and 5 were removed. 0 sequences have clone_id==NA.

3.1 Create germlines

Code
dowser_v <- packageVersion("dowser")

Identification of the V(D)J germline sequences from which each of the observed sequences is derived is performed with dowser::createGermlines, version 2.3. These reconstructed germlines will be used in downstream analysis to infer somatic mutations and reconstruct lineages.

dowser::createGermlines takes the alignment information in the rearrangement file as well as the reference database used by the alignment software and generates a germline sequence for each individual observed sequence. Because clonal relations have already been inferred, the function assigns the same germline to all sequences belonging to the same clone.

Two types of germlines are created, germline_alignment and germline_alignment_d_mask. The last one has the D region masked, meaning that all nucleotides in the N/P and D-segments are replaced with N’s. This is often done because the germline base calls from this region are unreliable for B cell receptor alignments.

Documentation for dowser::createGermlines is available here: https://dowser.readthedocs.io/en/latest/topics/createGermlines.

Code
pre_germ_size <- nrow(db)
db[['tmp_nrow']] <- 1:nrow(db)

# download and unzip if needed
imgt_db <- prepareIMGT(params$imgt_db)

references <- dowser::readIMGT(file.path(imgt_db,
                                species, "vdj"),
                               quiet=TRUE)
## [1] "Read in 1199 from 17 fasta files"
Code
# tmp fix (add unique sequence id ) for
# Error in dowser::createGermlines(db_sp, references, locus = "locus",  :
#   Sequence IDs are not unique!
dup_ids <- db %>%
    ungroup() %>%
    group_by(!!!rlang::syms(c(params$cloneby))) %>%
    mutate(dup=duplicated(sequence_id)) %>%
    pull(dup) %>% any()

if (dup_ids) {
    db <- db %>%
        mutate(sequence_id = paste(sample_id, sequence_id, sep = '_'))
}

is_na_clone <- is.na(db[["clone_id"]])
not_na_clone_db <- data.frame()
na_clone_db <- data.frame()
if (any(is_na_clone)) {
    na_clone_db <- dowser::createGermlines(
        db[is_na_clone,,drop=FALSE],
        references,
        locus = "locus",
        nproc = params$nproc,
        seq = "sequence_alignment",
        v_call = "v_call",
        d_call = "d_call",
        j_call = "j_call",
        amino_acid = FALSE,
        id = "sequence_id",
        clone = "tmp_nrow",
        v_germ_start = "v_germline_start",
        v_germ_end = "v_germline_end",
        v_germ_length = "v_germline_length",
        d_germ_start = "d_germline_start",
        d_germ_end = "d_germline_end",
        d_germ_length = "d_germline_length",
        j_germ_start = "j_germline_start",
        j_germ_end = "j_germline_end",
        j_germ_length = "j_germline_length",
        np1_length = "np1_length",
        np2_length = "np2_length",
        na.rm = TRUE,
        fields = params$cloneby
    )    
}

if (any(!is_na_clone)) {
    not_na_clone_db <- dowser::createGermlines(
        db[!is_na_clone,,drop=FALSE],
        references,
        locus = "locus",
        nproc = params$nproc,
        seq = "sequence_alignment",
        v_call = "v_call",
        d_call = "d_call",
        j_call = "j_call",
        amino_acid = FALSE,
        id = "sequence_id",
        clone = "clone_id",
        v_germ_start = "v_germline_start",
        v_germ_end = "v_germline_end",
        v_germ_length = "v_germline_length",
        d_germ_start = "d_germline_start",
        d_germ_end = "d_germline_end",
        d_germ_length = "d_germline_length",
        j_germ_start = "j_germline_start",
        j_germ_end = "j_germline_end",
        j_germ_length = "j_germline_length",
        np1_length = "np1_length",
        np2_length = "np2_length",
        na.rm = TRUE,
        fields = params$cloneby
    )
}

db <- bind_rows(not_na_clone_db, na_clone_db) %>%
    arrange(tmp_nrow) %>%
    select(-tmp_nrow)

any_germ_fail <- pre_germ_size > nrow(db)

7710 sequences passed the germline reconstruction step and 0 failed.

Code
# Remove cells that after createGermlines
# have only light chains
db <- findLightOnlyCells(db, 
                         sample_id = "sample_id", cell_id = cell_id,
                         locus = "locus", fields = NULL)
num_light_only <- sum(db[["light_only_cell"]], na.rm = T)
if (num_light_only > 0) {
    # Remove the cells
    db <- db %>% dplyr::filter(!light_only_cell)
    cat(paste0("Removed ",num_light_only," sequences in light chain only cells."))
}
db[["light_only_cell"]] <- NULL
Code
# Add again light chains before observed mutations when mixture of sc + bulk or only bulk
if (nrow(db_light) > 0){
    db <- bind_rows(db, db_light)
}

4 Clone size distribution

Find your clone sizes table here. Most real datasets, will have most clones of size 1 (one sequence). Straight sequence count as a mesure of the size of the clones is not the best measure to compare clone size between samples due to possible disproportionate sampling. See the Clonal abundance section.

Description of terms:

  • clone_size_count: Clone size as sequence counts. In a sample (sample_id), the number of heavy chain sequences with the same clone_id.

  • clone_size_freq: Clone size as percent of the repertoire. clone_size_count divided by the number of heavy chain sequences in the sample (sample_id).

4.1 Number of clones (heavy chain, incl. singletons)

Code
# Add clone_size 
clone_sizes <- countClones(
            db %>% filter(isHeavyChain(locus)), # Keep heavy chains only
            groups=unique(c("sample_id", params$cloneby)))
Code
tmp <- eetable(clone_sizes, 
               caption=NULL, 
               outdir=params$outdir, file="clone_sizes_table")

db <- db %>%
   left_join(clone_sizes) %>%
   rename(
      clone_size_count = seq_count,
      clone_size_freq = seq_freq
   ) %>% 
   mutate_at(vars(starts_with("clone_size")), round,2)
Code
num_clones_table <- db %>%
   filter(isHeavyChain(locus)) %>% # Keep heavy chains only
   group_by(sample_id) %>%
   mutate(sequences=n()) %>%
   group_by(sample_id) %>%
   mutate(
      number_of_clones=length(unique(clone_id)),
   ) %>%
   group_by(!!!rlang::syms(unique(c("sample_id","sequences", params$cloneby, "number_of_clones")))) %>%
   summarize_at(vars(starts_with("clone_size")), list("min"=min, "median"=median, "max"=max)) %>%
   mutate_at(vars(starts_with("clone_size")), round, 2)

tab_caption <- "Summary of the number of clones, and clone size, per sample. Includes singletons (clone_size == 1)."
tab <- eetable(num_clones_table, caption=tab_caption, outdir=params$outdir, file="num_clones_table")
tab$table
Table 4.1: Summary of the number of clones, and clone size, per sample. Includes singletons (clone_size == 1). File can be found here: num_clones_table.tsv

4.2 Clone size distribution

Code
caption <- "Clone size distribution. Size is measured as number of heavy chain sequences belonging to the same clone."
clone_size_plot <- ggplot(clone_sizes, aes(x=seq_count, color=sample_id, fill=sample_id))+
    geom_bar() + theme_enchantr() +
    facet_wrap(~sample_id, scales = "free_y", ncol=3) +
    xlab("Clone size (Number of sequences per clone)")
clone_size_plot <- eeplot(clone_size_plot, 
       outdir=params$outdir, 
       file=knitr::opts_current$get('clone-size'),
       caption=caption
       )
ggplotly(clone_size_plot + theme(panel.spacing=unit(2, 'lines'), legend.position="right"))

Figure 4.1: Clone size distribution. Size is measured as number of heavy chain sequences belonging to the same clone. ggplot file: clone_size_plot.RData

Code
caption <- clone_size_plot$enchantr$html_caption

4.3 Clone size distribution without singletons

Code
caption <- "Clone size distribution, excluding singletons (subset to clone size > 1). Size is measured as number of heavy chain sequences belonging to the same clone."
clone_size_atleast2 <- ggplot(clone_sizes %>% filter(seq_count>1), aes(x=seq_count, color=sample_id, fill=sample_id))+
    geom_bar() + theme_enchantr() +
    facet_wrap(~sample_id, scales = "free_y", ncol=3) +
    xlab("Clone size (Sequences per clone)")
clone_size_atleast2 <- eeplot(clone_size_atleast2, 
       outdir=params$outdir, 
       file=knitr::opts_current$get('clone-size_atleast2'),
       caption=caption
       )
ggplotly(clone_size_atleast2 + theme(panel.spacing=unit(2, 'lines'), legend.position="right"))

Figure 4.2: Clone size distribution, excluding singletons (subset to clone size > 1). Size is measured as number of heavy chain sequences belonging to the same clone. ggplot file: clone_size_atleast2.RData

Code
caption <- clone_size_atleast2$enchantr$html_caption
Code
cat("Only singletons detected: there aren't clones of size>1.\n\n")

5 Clonal abundance

Code
# set empty default abundance
a <- new("AbundanceCurve")

Clonal abundance is the size of each clone (as a fraction of the entire repertoire). To correct for the different number of sequences in each of the samples, estimateAbundance estimates the clonal abundance distribution along with confidence intervals on these clone sizes using bootstrapping. 200 random bootstrap samples were taken, with size the number of sequences in the sample with less sequences (N). The y-axis shows the clone abundance (i.e., the size as a percent of the repertoire) and the x-axis is a rank of each clone, where the rank is sorted by size from larger (rank 1, left) to smaller (right). The shaded areas are confidence intervals.

Code
# calculate the rank-abundance curve
a <- estimateAbundance(db %>% filter(isHeavyChain(locus)),
                       group = "sample_id", min_n = params$min_n)
if (nrow(a@abundance)==0) {
    cat("\nAll groups failed to pass the threshold min_n=",params$min_n,". Skipping clonal abundance report.\n\n")
}
Code
# annotate
a@abundance <- a@abundance %>%
   left_join( db %>% 
                 select(any_of(c(unique(c("sample_id", 
                                                "subject_id",
                                                params$cloneby)))
                                       )
                  ) %>%
                 distinct(),
              
              )
p <- plotAbundanceCurve(a, annotate="depth", silent = T)
Code
tab <- eetable(a@abundance, file = "clonal_abundance", 
               outdir=params$outdir, show_max=10,
               caption="Example 10 lines of the clonal abundance file.")
tab$table %>%
        DT::formatRound(columns=c("p","p_sd","lower","upper"),digits=3)
Table 5.1: Example 10 lines of the clonal abundance file. File can be found here: clonal_abundance.tsv

5.1 Abundance plot by sample

Code
abundanceSample <- p + facet_wrap(~ subject_id + sample_id, ncol=3)
abundanceSample <- eeplot(abundanceSample, 
                     outdir=params$outdir, 
                     file=knitr::opts_current$get('abundanceSample'),
                     caption=paste0("Clonal abundance plot per `sample_id`. Clonal abundance was calculated with a sample of N=",
                      a@n,
                      " sequences with ",
                      a@nboot,
                      " boostrapping repetitions."),
                     a)
abundanceSample +
    theme(legend.position = "none")
Clonal abundance plot per `sample_id`. Clonal abundance was calculated with a sample of N=65 sequences with 200 boostrapping repetitions. <a href='ggplots/abundanceSample.RData'>ggplot file: abundanceSample.RData</a>

Figure 5.1: Clonal abundance plot per sample_id. Clonal abundance was calculated with a sample of N=65 sequences with 200 boostrapping repetitions. ggplot file: abundanceSample.RData

Code
caption <- abundanceSample$enchantr$html_caption  

6 Diversity

The clonal abundance distribution can be characterized using diversity statistics. Diversity scores (D) are calculated using the generalized diversity index (Hill numbers), which covers many different measures of diversity in a single function with a single varying parameter, the diversity order q.

The function alphaDiversity resamples the sequences (200 random bootstrapping events, with the number of sequences in the sample with less sequences (N)) and calculates diversity scores (D) over a interval of diversity orders (q). The diversity (D) is shown on the y-axis and the x-axis is the parameter q. - q = 0 corresponds to Species Richness - q = 1 corresponds to Shannon Entropy - q = 2 corresponds to Simpson Index

Inspection of this figure is useful to determine whether any difference in diversity between two repertoires depends on the statistic used or if it is a universal property.

The clonal diversity \(D\) of the repertoire was calculated according to the general formula of Hill Diversity numbers:

\[ \begin{aligned} ^{q}D = \left( \sum_{i=1}^Rp_i^q \right)^{1/(1-q)} \end{aligned} \]

where:

  • \(p_i\) is the proportion of unique sequences belonging to clone \(i\).
  • \(q\) are the values of the different diversity numbers.
  • \(R\) is the Richness, the number of different clones in the sample.

At \(q=1\) the function is undefined and the limit to zero equals the exponential of the Shannon Entropy:

\[ \begin{aligned} ^{1}D = exp \left( \sum_{i=1}^Rp_i ln(p_i) \right) \end{aligned} \]

The intuition about the different Hill Diversity values is the following:

  • At \(q=0\) the diversity index equals the number of clones in the sample.
  • At \(q=1\) the diversity index is the geometric mean of the clones in the sample, weighted by their proportion in the sample.
  • At \(q>1\) more weight is given to the clones with higher proportions in the sample.

6.1 Diversity curves

The following table shows the summary of the diversity calculations per sample.

Code
# generate the Hill diversity curve
d <- alphaDiversity(db %>% filter(isHeavyChain(locus)), 
                    group = "sample_id", 
                    min_n=params$min_n)

d@diversity <- d@diversity %>%
   left_join( db %>% 
                 select(any_of(c(unique(c("sample_id", 
                                                "subject_id", 
                                                params$cloneby)))
                                       )
                  ) %>%
                 distinct(),
              
              )

diversitySample <- plotDiversityCurve(d, silent = T, annotate="depth")

tab <- eetable(d@diversity, file = "clonal_diversity", 
               outdir=params$outdir, show_max=10,
               caption="Example 10 lines of the clonal diversity file.")
tab$table %>%
        DT::formatRound(columns=c("q","d","d_sd","d_lower", "d_upper", "e", "e_lower", "e_upper"),digits=3)
Table 6.1: Example 10 lines of the clonal diversity file. File can be found here: clonal_diversity.tsv

6.2 Diversity plot by sample

Code
# plot duplicated cells
diversitySample <- diversitySample + 
    geom_vline(xintercept = c(0,1,2), color = "grey50", linetype = "dashed") +
    facet_wrap(~sample_id + subject_id, ncol=3, scales = "free_x")
diversitySample <- eeplot(diversitySample, 
                     outdir=params$outdir, 
                     file=knitr::opts_current$get('label'),
                     caption=paste0("Clonal diversity per `sample_id`.Clonal diversity was calculated with a sample of N=",
                      d@n,
                      " sequences with ",
                      a@nboot,
                      " boostrapping repetitions."),
                     d)
diversitySample +
    theme(legend.position = "none")
Clonal diversity per `sample_id`.Clonal diversity was calculated with a sample of N=65 sequences with 200 boostrapping repetitions. <a href='ggplots/diversitySample.RData'>ggplot file: diversitySample.RData</a>

Figure 6.1: Clonal diversity per sample_id.Clonal diversity was calculated with a sample of N=65 sequences with 200 boostrapping repetitions. ggplot file: diversitySample.RData

Code
caption <- diversitySample$enchantr$html_caption

6.3 Diversity at different q

  • q=0 corresponds to Richness (number of different clones in the sample).
  • q=1 corresponds to Shannon diversity.
  • q=2 corresponds to Simpson diversity.
Code
# plot duplicated cells
div_data <- d@diversity %>% dplyr::filter(q==0 | q==1 | q==2)

div_at_q <- ggplot(div_data, aes(x=q, y=d, color=sample_id)) +
    geom_point() + theme_enchantr() +
    scale_x_continuous(breaks= c(0,1,2))
    
div_at_q <- eeplot(div_at_q, 
                     outdir=params$outdir, 
                     file=knitr::opts_current$get('label'),
                     caption="Diversity at different q values.",
                     div_at_q)
ggplotly(div_at_q)

(#fig:div_at_q)Diversity at different q values. ggplot file: div_at_q.RData

Code
caption <- div_at_q$enchantr$html_caption
Code
cat("# Mutation frequency\n")

7 Mutation frequency

Code
db <- shazam::observedMutations(db,
                                sequenceColumn = "sequence_alignment",
                                germlineColumn = "germline_alignment_d_mask",
                                frequency = T,
                                combine = T,
                                nproc = params$nproc)
Code
cat("Showing mutation frequency per c_gene only if the `c_call` column is present.\n")

Showing mutation frequency per c_gene only if the c_call column is present.

Code
cat("The mutation frequency per sequence is stored in the final dataframes in the `mu_freq` column.\n")

The mutation frequency per sequence is stored in the final dataframes in the mu_freq column.

Code
db_cgene <- db %>% filter(!is.na(c_gene), !(is.na(mu_freq)))
db_cgene$mu_freq <- as.numeric(db_cgene$mu_freq)

if (nrow(db_cgene) > 0) {
    mufreqSample <- ggplot(db_cgene, aes(x=c_gene, y=mu_freq, fill=c_gene)) + 
                    geom_boxplot() +
                    facet_wrap(~ subject_id + sample_id, ncol=3, scales = "free") +
                    theme_enchantr() +
                    theme(axis.text.x = element_text(angle = 45, hjust = 1))

    mufreqSample <- eeplot(mufreqSample, 
                        outdir=params$outdir, 
                        file='mutation_frequency_sample',
                        caption=paste0("Mutation frequency per C gene per `sample_id`."),
                        mufreqSample)
    print(mufreqSample +
        theme(legend.position = "none"))
    caption <- mufreqSample$enchantr$html_caption  
}
Mutation frequency per C gene per `sample_id`. <a href='ggplots/mutation_frequency_sample.RData'>ggplot file: mutation_frequency_sample.RData</a>

Figure 7.1: Mutation frequency per C gene per sample_id. ggplot file: mutation_frequency_sample.RData

Code
db_locus <- db %>% filter(!is.na(locus), !is.na(mu_freq))
db_locus$mu_freq <- as.numeric(db_locus$mu_freq)

if (nrow(db_locus) > 0) {
    mufreqSampleLocus <- ggplot(db_locus, aes(x=locus, y=mu_freq, fill=locus)) + 
                    geom_boxplot() +
                    facet_wrap(~ subject_id + sample_id, ncol=3, scales = "free") +
                    theme_enchantr() +
                    theme(axis.text.x = element_text(angle = 45, hjust = 1))

    mufreqSampleLocus <- eeplot(mufreqSampleLocus, 
                        outdir=params$outdir, 
                        file='mutation_frequency_sample_locus',
                        caption=paste0("Mutation frequency per locus per `sample_id`."),
                        mufreqSampleLocus)
    print(mufreqSampleLocus +
        theme(legend.position = "none"))
    caption <- mufreqSampleLocus$enchantr$html_caption 
}

8 Final repertoires and tables

Summary tables:

Find your final repertoires here:

9 Software versions

Code
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-redhat-linux-gnu
## Running under: Fedora Linux 40 (Container Image)
## 
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS-OPENMP;  LAPACK version 3.11.0
## 
## locale:
## [1] C
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] plotly_4.10.4         ComplexHeatmap_2.18.0 enchantr_0.1.19      
##  [4] dowser_2.3            scoper_1.3.0          shazam_1.2.0         
##  [7] alakazam_1.3.0        ggplot2_3.5.1         airr_1.5.0           
## [10] tidyr_1.3.1           dplyr_1.1.4           DT_0.33              
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          rstudioapi_0.17.1          
##   [3] jsonlite_1.8.9              shape_1.4.6.1              
##   [5] magrittr_2.0.3              farver_2.1.2               
##   [7] rmarkdown_2.29              GlobalOptions_0.1.2        
##   [9] fs_1.6.5                    zlibbioc_1.52.0            
##  [11] vctrs_0.6.5                 memoise_2.0.1              
##  [13] Rsamtools_2.22.0            ggtree_3.14.0              
##  [15] htmltools_0.5.8.1           S4Arrays_1.6.0             
##  [17] progress_1.2.3              SparseArray_1.6.0          
##  [19] gridGraphics_0.5-1          sass_0.4.9                 
##  [21] KernSmooth_2.23-24          bslib_0.8.0                
##  [23] htmlwidgets_1.6.4           cachem_1.1.0               
##  [25] GenomicAlignments_1.42.0    igraph_2.1.1               
##  [27] lifecycle_1.0.4             iterators_1.0.14           
##  [29] pkgconfig_2.0.3             Matrix_1.7-1               
##  [31] R6_2.5.1                    fastmap_1.2.0              
##  [33] GenomeInfoDbData_1.2.13     MatrixGenerics_1.18.0      
##  [35] clue_0.3-66                 digest_0.6.37              
##  [37] aplot_0.2.3                 colorspace_2.1-1           
##  [39] patchwork_1.3.0             S4Vectors_0.44.0           
##  [41] crosstalk_1.2.1             GenomicRanges_1.58.0       
##  [43] labeling_0.4.3              phylotate_1.3              
##  [45] fansi_1.0.6                 httr_1.4.7                 
##  [47] polyclip_1.10-7             abind_1.4-8                
##  [49] compiler_4.4.2              bit64_4.5.2                
##  [51] withr_3.0.2                 doParallel_1.0.17          
##  [53] BiocParallel_1.40.0         viridis_0.6.5              
##  [55] ggforce_0.4.2               MASS_7.3-61                
##  [57] DelayedArray_0.32.0         rjson_0.2.23               
##  [59] tools_4.4.2                 ape_5.8                    
##  [61] quadprog_1.5-8              glue_1.8.0                 
##  [63] nlme_3.1-166                cluster_2.1.6              
##  [65] ade4_1.7-22                 generics_0.1.3             
##  [67] seqinr_4.2-36               gtable_0.3.6               
##  [69] tzdb_0.4.0                  data.table_1.16.2          
##  [71] hms_1.1.3                   tidygraph_1.3.1            
##  [73] utf8_1.2.4                  XVector_0.46.0             
##  [75] BiocGenerics_0.52.0         markdown_1.13              
##  [77] ggrepel_0.9.6               foreach_1.5.2              
##  [79] pillar_1.9.0                stringr_1.5.1              
##  [81] vroom_1.6.5                 yulab.utils_0.1.8          
##  [83] circlize_0.4.16             tweenr_2.0.3               
##  [85] treeio_1.30.0               lattice_0.22-6             
##  [87] bit_4.5.0                   tidyselect_1.2.1           
##  [89] Biostrings_2.74.0           knitr_1.49                 
##  [91] gridExtra_2.3               bookdown_0.41              
##  [93] IRanges_2.40.0              SummarizedExperiment_1.36.0
##  [95] stats4_4.4.2                xfun_0.49                  
##  [97] graphlayouts_1.2.1          Biobase_2.66.0             
##  [99] diptest_0.77-1              matrixStats_1.4.1          
## [101] stringi_1.8.4               UCSC.utils_1.2.0           
## [103] lazyeval_0.2.2              ggfun_0.1.7                
## [105] yaml_2.3.10                 evaluate_1.0.1             
## [107] codetools_0.2-20            ggraph_2.2.1               
## [109] tibble_3.2.1                ggplotify_0.1.2            
## [111] cli_3.6.3                   munsell_0.5.1              
## [113] jquerylib_0.1.4             Rcpp_1.0.13-1              
## [115] GenomeInfoDb_1.42.0         png_0.1-8                  
## [117] parallel_4.4.2              readr_2.1.5                
## [119] prettyunits_1.2.0           bitops_1.0-9               
## [121] phangorn_2.12.1             viridisLite_0.4.2          
## [123] tidytree_0.4.6              scales_1.3.0               
## [125] purrr_1.0.2                 crayon_1.5.3               
## [127] GetoptLong_1.0.5            rlang_1.1.4                
## [129] fastmatch_1.1-4